# Estimate Cost-Effectiveness and Welfare Impacts of Policy Scenarios
# TODO:
# 

pacman::p_load(data.table, tidyverse, sf, cowplot, kableExtra, modelsummary, scales, fst, tinytable, pbmcapply)

rm(list = ls())
source("code/globals.R")

# Global assumptions -----
VOLUNTARY_TAKEUP_RATE <- 0.40 # based on Champ et al JEBO

# Assumptions we used in JPE submission
# BETA_SELF <- 0.135
# BETA_NEIGHBOR <- 0.022

# Assumptions we use after updates made for JPE revisions
BETA_SELF <- 0.133 # Table 1, column 3, row 2: SRA 2008--2016
BETA_NEIGHBOR <- 0.017 - 0.006 # Table 2, column 1, 1 pre-code neighbor MINUS 1 post-code neighbor estimate

LOSS_PROB_NO_MITIGATION <- 0.46 # Assumed loss risk for a home with no neighbors and no mitigation investment. See assumptions in paper.

MEASURE_LIFETIME <- 40
DISCOUNT_RATE <- 0.05

# Implications of assumptions
# Reduced probability of loss from mitigation
ownriskreduction <- BETA_SELF / (1 - VOLUNTARY_TAKEUP_RATE)
neighborriskreduction <- BETA_NEIGHBOR / (1 - VOLUNTARY_TAKEUP_RATE)

# Probability of destruction if mitigation occurs
destprob <- LOSS_PROB_NO_MITIGATION - ownriskreduction 

# Load sample homes -----
samplehomes <- read_fst(file.path(WORKING, "statewide-costeff-sample.fst"), as.data.table = T)


# Cost effectiveness holding rebuild / mitigation costs fixed ----

### HW report that their model build is a 2500 sq foot, single story home with a 2-car garage of typical
### quality that was built for $140/sq ft. The closest corresponding construction category in HAZUS is "Custom", which has
### a national average cost per square foot of $159 (recognizing also that MT may also have below-national-avg construction costs).

sqft <- 2500
mainpersqft <- 159.51
garageadder <- 34319
main <- mainpersqft * sqft + garageadder
contents <- 0.5 * main
temphousing <- 0.83 * 24 * sqft ### Temporary housing and disruption costs Page 6-14 of HAZUS Technical manual (Assuming 24 months of temp housing). Note I apply R.S. Means Location Factors here too, as a rough way of capturing high rental costs in CA.
disruption <- 0.99 * sqft

unadj <- main + contents + temphousing + disruption

locationfactors <- read.csv(file.path(INPUT_PUBLIC, "HAZUS", "StateData", "Exported", "MeansAdjFactor.txt")) %>%
  select(CountyFIPS, MeansAdjRes) %>%
  mutate(CountyFIPS = sprintf("%05d", CountyFIPS)) # Match FIPS formatting

AvgLocAdj <- samplehomes %>%
  mutate(CountyFIPS = as.character(FIPS)) %>%
  count(CountyFIPS) %>%
  merge(locationfactors, by = "CountyFIPS") %>%
  dplyr::summarize(MeansAdjFactor = weighted.mean(MeansAdjRes, w = n))

adjustedlosses <- unadj * AvgLocAdj
adjustedlosses <- adjustedlosses$MeansAdjFactor

ConstantCaliforniaLoss <- adjustedlosses + 150000 # hazardous waste removal

rm(sqft, mainpersqft, garageadder, contents, temphousing, disruption, unadj, locationfactors, AvgLocAdj, main, adjustedlosses)

# Compute average number of near neighbors in a representative home ----

# Average number of near neighbors (around 2.5)
avgneighbors <- mean(samplehomes$neighbors_in_30m_centroid, na.rm = T)

# Functions to calculate cost effectiveness ----

### For expected utility calculations
w0 <- 1000000 # permanent income

## CRRA utility function
u <- function(c, gamma) {
  if (gamma == 1) {
    return(log(c))
  } else {
    return((c^(1 - gamma)) / (1 - gamma))
  }
}

get_PDV <- function(values, lifetime, discount_rate) {
  # Get Present Discounted Value of a constant flow of values x
  # (product + rowSums version, around 60x faster than original)
  # values: numeric vector, each one is a constant flow
  # lifetime: numeric vector, periods over when to compute PDV
  # discount_rate: discount rate 
  
  discounts <- 1 / (rep(1 + discount_rate, lifetime)^(0:(lifetime - 1)))
  
  # Outer product + rowSums is the fastest way to do this I've found so far
  # Better than original, pure mtx multiplication or sapply
  # Should be a vector the same length as values
  pdv <- rowSums(outer(values, discounts))
  
  pdv
}

get_PDV(rnorm(1e5, 10000, 1000), lifetime = 40, discount_rate = 0.05)

# Calculate what fire probability would yield a cost effectiveness of zero (breakeven point) ----


breakeven <- function(m, loss = ConstantCaliforniaLoss, 
                      pD = destprob, tau_ii = ownriskreduction, tau_ij = neighborriskreduction, NumNb = avgneighbors, 
                      lifetime = MEASURE_LIFETIME, i = DISCOUNT_RATE, 
                      shareinsured = 0.6, crra = 0) {
  # crra: numeric vector, coefficient of relative risk aversion
  # calcEU: logical, should it do the expected utility version of calculation? (Slow).
  
  # pD = destprob; lifetime = MEASURE_LIFETIME; loss = ConstantCaliforniaLoss; shareinsured = 0.6; m = 10000; tau_ii = ownriskreduction; tau_ij = neighborriskreduction; NumNb = avgneighbors; i = DISCOUNT_RATE; crra = 2
  # END DEBUG
  
  if (crra == 0) {
    print("Risk neutral calculation")
    ###### Risk neutral cost effectiveness (dynamic)
    ## Conditional-on-fire losses without mitigation
    L1 <- (pD + tau_ii + NumNb * tau_ij) * loss # annual
    PDV1 <- get_PDV(L1, lifetime = lifetime, discount_rate = i)
    
    ## Conditional-on-fire losses with mitigation by self AND neighbors
    L2 <- (pD) * loss # annual
    PDV2 <- get_PDV(L2, lifetime = lifetime, discount_rate = i)
    
    pf_RN_est <- m / (PDV1 - PDV2)
    return(pf_RN_est)
  } else { # if crra > 0
    print("Expected utility calculation")
    ##### Expected utility (static approximation)
    Li <- loss * shareinsured
    Lu <- loss * (1 - shareinsured)
    
    discounts <- 1 / (rep(1 + i, lifetime)^(0:(lifetime - 1)))
    delta <- mean(discounts) # discount all losses as if they occur at midpoint of period
    LiD <- Li * delta
    LuD <- Lu * delta
    
    # Grid search to find the lowest fire probability that justifies mitigation
    # This is quite fast, since it's all Vectorize
    df <- data.frame(pF = seq(0, 1 / lifetime, 0.000001)) %>% ## Have to stop at 1/lifetime because of simplification that assumes multiple loss outcomes don't occur
      mutate(
        ## Expected utility without mitigation (p1 * LiD is the actuarily fair premium)
        p1 = lifetime * pF * (pD + tau_ii + NumNb * tau_ij), ## Convert annual risk to lifetime risk to accident risk (simplification: Multiple loss outcomes are assumed not to occur (treated as 1 loss)).
        EU1 = p1 * u(w0 - p1 * LiD - LuD, gamma = crra) + (1 - p1) * u(w0 - p1 * LiD, gamma = crra), 
        ## Expected utility with mitigation by self AND neighbors
        p2 = lifetime * pF * pD, ## Convert annual risk to lifetime risk to accident risk (simplification: Multiple loss outcomes are assumed not to occur (treated as 1 loss)).
        EU2 = p2 * u(w0 - p2 * LiD - LuD - m, gamma = crra) + (1 - p2) * u(w0 - p2 * LiD - m, gamma = crra),
        ## Difference
        mitigationadvantage = EU2 - EU1,
        ## Check for negative consumption: wealth has to be greater than insurance premium + losses in bad state of the world
        neg_cons_check = w0 > p1 * LiD + LuD & w0 > p2 * LiD + LuD + m
      )
    stopifnot(all(df$neg_cons_check)) # ensure losses do not exceed wealth in any considered scenario
    
    # Find the lowest fire probability that justifies mitigation (use >= so that we get 0 for mitigation cost = 0, just to match risk neutral calc)
    pF_EU_est <- df %>% filter(mitigationadvantage >= 0) %>% arrange(pF) %>% slice(1) %>% pull(pF)
    
    if (length(pF_EU_est) == 0) {
      print("No fire probability justifies investment")
      return(NA)
    } else {
      return(pF_EU_est)
    }
  }
}


## TEST
breakeven(
  NumNb = 3.5,
  pD = destprob,
  lifetime = MEASURE_LIFETIME,
  loss = ConstantCaliforniaLoss,
  shareinsured = 1,
  m = 15660,
  tau_ii = ownriskreduction,
  tau_ij = neighborriskreduction,
  i = DISCOUNT_RATE,
  crra = 5
)

# TABLE: Break-even wildfire hazard under alternative cost scenarios ----

# Cost estimates (after converting NAHB estimates to 2018 dollars from 2020)
# Convert NAHB estimates to 2018 dollars from 2020
# https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator
costestimates_with_source <- data.frame(
  source = c("HE-Low", "NAHB-Low", "HE", "NAHB-High", "HE"),
  costs = round(c(0, 7633.79, 15660, 28552.99, 62760))
)

breakeven_tbl <- expand_grid(cost = costestimates_with_source$costs, 
                             shareinsured = c(1, 0.67, 0.33), 
                             riskaversion = c(2, 5))

# Set CRRA = 0 if full insurance
breakeven_tbl <- breakeven_tbl %>%
  mutate(riskaversion = if_else(shareinsured == 1, 0, riskaversion)) %>%
  distinct(cost, shareinsured, riskaversion)

# Use mapply to apply all arguments sequentially (alternative would be something like rowwise)
breakeven_tbl$pF <- mapply(breakeven, 
                           m = breakeven_tbl$cost, 
                           shareinsured = breakeven_tbl$shareinsured,
                           crra = breakeven_tbl$riskaversion,
                           MoreArgs = list(
                             NumNb = avgneighbors, # hold this constant at sample average
                             pD = destprob,
                             lifetime = MEASURE_LIFETIME,
                             loss = ConstantCaliforniaLoss,
                             tau_ii = ownriskreduction,
                             tau_ij = neighborriskreduction,
                             i = DISCOUNT_RATE))

breakeven_tbl_wide <- breakeven_tbl %>%
  mutate(pF = sprintf("%0.2f", 100 * pF)) %>% # Convert to rounded percentage
  pivot_wider(
    names_from = c("shareinsured", "riskaversion"),
    values_from = "pF",
    names_prefix = "cutoff"
  ) %>%
  merge(costestimates_with_source, by.x = "cost", by.y = "costs") %>%
  mutate(cost = format(as.numeric(cost), nsmall = 0, big.mark = ",")) %>%
  select(cost, source, everything())

colnames(breakeven_tbl_wide) <- NULL

kbl(breakeven_tbl_wide, 
    format = "latex",
    booktabs = T,
    escape = F,
    linesep = "",
    align = c("llccccc"),
    digits = c(0, 0, 2, 2, 2, 2)) %>%
  add_header_above(c("Cost Estimate ($)" = 1, "Source" = 1, "Break-even Wildfire Hazard (%)" = 5)) %>%
  add_header_above(c(" " = 2, "CRRA" = 1, "$\\\\gamma = 2$" = 1, "$\\\\gamma = 5$" = 1, "$\\\\gamma = 2$" = 1, "$\\\\gamma = 5$" = 1), escape = F) %>%
  add_header_above(c(" " = 1, "Insured \\\\%" = 1, "100" = 1, "67" = 2, "33" = 2), escape = F) %>%
  kableExtra::group_rows("Panel A. New Home", start_row = 1, end_row = 4, bold = F, italic = T) %>%
  kableExtra::group_rows("Panel B. Retrofit", start_row = 5, end_row = 5, bold = F, italic = T) %>%
  write(file = "results/costeff.tex")



# Set up data to be used for cost effectiveness and welfare scenarios
# (Basically, keep only the variables we need)
sample_scenarios <- samplehomes %>%
  select(ImportParcelID, BuildingOrImprovementNumber, FIPS, combinedyear, PropertyZip, street, streetID, tract, block, sqfeet, neighbors_in_30m_centroid, NeighborSqFt, persqftlosses, hazuslosses, GarageCostTotal, wildfire_hazard, wildfire_hazard_pct)

# # Adapt a per-square-foot specification for mitigation costs
sample_scenarios <- sample_scenarios %>%
  mutate(
    mitigation_cost = (15660 / 2500) * sqfeet,
    mitigation_cost_retrofit = (62760 / 2500) * sqfeet
  ) ## HE mid scenario for 2500 sq ft home is 15560

# Compute losses for each home based on square footage
sample_scenarios <- sample_scenarios %>% mutate(thishouselosses = sqfeet * persqftlosses + GarageCostTotal + 150000)

# Compute total neighbor losses based on square footage
# If missing, replace with 0
sample_scenarios <- sample_scenarios %>% mutate(NeighborSqFt = replace_na(NeighborSqFt, 0))

sample_scenarios <- sample_scenarios %>%
  mutate(neighborlosses = NeighborSqFt * persqftlosses + neighbors_in_30m_centroid * (GarageCostTotal + 150000))

# Note: scatterplot used constant losses, costant costs, but per-home wildfire hazard

# Calculate private benefits of mitigation ----

privatebenefit <- function(pF, loss, 
                           pD = destprob, 
                           tau_ii = ownriskreduction, 
                           lifetime = MEASURE_LIFETIME, i = DISCOUNT_RATE) {
  # Note: all inputs can be length 1 or length n
  
  ## Expected losses without mitigation
  EL1 <- pF * (pD + tau_ii) * loss # annual
  PDV1 <- get_PDV(EL1, lifetime = lifetime, discount_rate = i)
  
  ## Expected losses with mitigation
  EL2 <- pF * pD * loss # annual
  PDV2 <- get_PDV(EL2, lifetime = lifetime, discount_rate = i)
  
  return(PDV1 - PDV2)
}


# Calculate external benefits of mitigation ----

externalbenefit <- function(pF, NbLoss, 
                            pD = destprob,
                            tau_ij = neighborriskreduction, NumNb = avgneighbors, 
                            lifetime = MEASURE_LIFETIME, i = DISCOUNT_RATE
) {
  # NbLoss: Total losses for neighbors ()
  
  ## Expected neighbor losses without mitigation
  EL1 <- pF * (pD + tau_ij) * NbLoss # annual
  PDV1 <- get_PDV(EL1, lifetime = lifetime, discount_rate = i)
  
  ## Expected neighbor losses with mitigation
  EL2 <- pF * pD * NbLoss # annual
  PDV2 <- get_PDV(EL2, lifetime = lifetime, discount_rate = i)
  
  return(PDV1 - PDV2)
}

## Calculate per-home net benefits ----

sample_scenarios <- sample_scenarios %>% 
  mutate(private_benefit = privatebenefit(pF = wildfire_hazard, loss = thishouselosses))

sample_scenarios <- sample_scenarios %>% 
  mutate(external_benefit = externalbenefit(pF = wildfire_hazard, NbLoss = neighborlosses))

sample_scenarios <- sample_scenarios %>%
  mutate(netbenefit = private_benefit + external_benefit - mitigation_cost,
         netbenefit_retrofit = private_benefit + external_benefit - mitigation_cost_retrofit)

# Descriptive statistics for sampled homes ----

# All homes
vardist_all <- datasummary(combinedyear + sqfeet1000 + neighbors_in_30m_centroid + wildfire_hazard_pct + persqftlosses ~ 
                             Mean + SD + P5 + Median + P95,
                           data = samplehomes,
                           fmt = fmt_significant(3),
                           output = "data.frame") 

# Pre-1998 homes
vardist_pre1998 <- datasummary(combinedyear + sqfeet1000 + neighbors_in_30m_centroid + wildfire_hazard_pct + persqftlosses ~ 
                                 Mean + SD + P5 + Median + P95,
                               data = samplehomes %>% filter(combinedyear < 1998),
                               fmt = fmt_significant(3),
                               output = "data.frame")


# Post-1998 homes
vardist_post1998 <- datasummary(combinedyear + sqfeet1000 + neighbors_in_30m_centroid + wildfire_hazard_pct + persqftlosses ~ 
                                  Mean + SD + P5 + Median + P95,
                                data = samplehomes %>% filter(combinedyear >= 1998),
                                fmt = fmt_significant(3),
                                output = "data.frame")

# Combine
vardist <- bind_rows(vardist_all, vardist_pre1998, vardist_post1998)

# Use the data dictionary to rename variables
vardist <- vardist %>% 
  mutate(` ` = dict_names[as.character(` `)])


counts <- tribble(
  ~` `, ~Mean, # Match names from vardist
  "Counties", comma(n_distinct(samplehomes$FIPS)),
  "Census blocks", comma(n_distinct(samplehomes$block)))


sum_df <- bind_rows(vardist, counts)
sum_df[is.na(sum_df)] <- "" # Replace NA with empty string



kbl(sum_df, 
    format = "latex",
    escape = F,
    format.args = list(big.mark = ","),
    linesep = "",
    align = "lccccc",
    booktabs = T) %>%
  group_rows(sprintf("Panel A. All (Homes = %s)", comma(nrow(samplehomes))), 
             start_row = 1, end_row = 5, bold = F, italic = T) %>%
  group_rows(sprintf("Panel B. Pre-1998 (Homes = %s)", comma(nrow(samplehomes %>% filter(combinedyear < 1998)))), 
             start_row = 6, end_row = 10, bold = F, italic = T) %>%
  group_rows(sprintf("Panel C. 1998 and later (Homes = %s)", comma(nrow(samplehomes %>% filter(combinedyear >= 1998)))), 
             start_row = 11, end_row = 15, bold = F, italic = T) %>%
  group_rows("Panel D. Areas represented", start_row = 16, end_row = 17, bold = F, italic = T) %>%
  write(file = "results/welfare-descriptive-statistics.tex")

### TODO: Why do we only have 50 counties?

## Correlations table
dict_names[c("persqftlosses", "sqfeet1000", "wildfire_hazard_pct", "neighbors_in_30m_centroid")]

corr_df <- datasummary_correlation(sample_scenarios %>% 
                                     select(`(1) Square feet (1000s)` = sqfeet,
                                            `(2) Neighbors in 30m` = neighbors_in_30m_centroid,
                                            `(3) Wildfire hazard (\\%)` = wildfire_hazard_pct, 
                                            `(4) Losses per sf` = persqftlosses),
                                   use = "complete.obs",
                                   fmt = 2,
                                   output = "data.frame")

colnames(corr_df) <- c(" ", "(1)", "(2)", "(3)", "(4)")

kbl(corr_df, 
    format = "latex",
    escape = F,
    format.args = list(big.mark = ","),
    linesep = "",
    align = "lcccc",
    booktabs = T,
    digits = c(0, 2, 2, 2, 2)) %>%
  add_header_above(c(" " = 1, "Correlation" = 4)) %>%
  write(file = "results/correlations.tex")



# Calculated per-home benefits fixing size of home ----
# But allow it to vary by wildfire hazard


# FIGURE: Cost effectiveness - Scatterplot of average net benefits by wildfire hazard and number of neighbors ----


# Compute breakeven WF for all values of near neighors
make_costeff_plot <- function(this_df, this_mitigation_cost = 15660, nb_lims = c(-15, 20), pF_lims = c(0, 0.02), neighbor_lims = c(0, 4)) {
  
  # this_df <- sample_scenarios
  # mitigation_cost = 15660; nb_lims = c(-15, 20); pF_lims = c(0, 0.02); neighbor_lims = c(0, 4)
  # END DEBUG
  
  # Calculated per-home benefits fixing size of home, but allow it to vary by hazard
  this_df <- this_df %>% 
    mutate(private_benefit_fixsize = privatebenefit(pF = wildfire_hazard, loss = ConstantCaliforniaLoss),
           external_benefit_fixsize = externalbenefit(pF = wildfire_hazard, NbLoss = neighbors_in_30m_centroid * ConstantCaliforniaLoss),
           netbenefit_fixsize = private_benefit_fixsize + external_benefit_fixsize - this_mitigation_cost)
  
  
  # Group by area (tract, previously zip)
  area_averages <- this_df %>%
    group_by(tract) %>%
    summarize(
      wildfire_hazard = oob_squish(mean(wildfire_hazard), pF_lims),
      neighbors_in_30m_centroid = oob_squish(mean(neighbors_in_30m_centroid), neighbor_lims),
      netbenefit_fixsize_1000s = oob_squish(mean(netbenefit_fixsize) / 1000, range = nb_lims),
      n_homes = n()
    )
  
  # Compute breakeven line
  breakeven_line <- data.frame(neighbors_in_30m_centroid = seq(0, 4, 0.01)) %>%
    mutate(breakeven_pF = oob_squish(breakeven(m = this_mitigation_cost, loss = ConstantCaliforniaLoss, NumNb = neighbors_in_30m_centroid), range = pF_lims))
  
  p <- ggplot(area_averages, aes(x = wildfire_hazard, y = neighbors_in_30m_centroid)) +
    geom_point(aes(size = n_homes, fill = netbenefit_fixsize_1000s), colour = "black", shape = 21) + 
    geom_line(data = breakeven_line, aes(x = breakeven_pF, y = neighbors_in_30m_centroid), linewidth = 0.75, color = "black", linetype = "longdash") +
    scale_x_continuous(name = "Annual Wildfire Probability", labels = percent) + 
    scale_y_continuous(name = "Average Near Neighbors") + 
    scale_size_continuous(range = c(1, 2.5)) +
    scale_fill_gradient2(low = "#ef8a62", mid = "#f7f7f7", high = "#67a9cf", limits = nb_lims, breaks = seq(nb_lims[1], nb_lims[2], 5)) + 
    theme_minimal() + 
    theme(legend.position = "bottom") + 
    guides(size = "none", fill = guide_colorbar(
      title = "Net reduction in expected cost ($1,000)",
      title.position = "top",
      title.hjust = 0.5,
      barwidth = 12,
    ))
  p
}

make_costeff_plot(sample_scenarios, this_mitigation_cost = 15660)

save_plot(plot = make_costeff_plot(sample_scenarios, this_mitigation_cost = 7634),
          filename = file.path(RES, "breakeven_plot_7634.pdf"), base_height = 4.75)

save_plot(plot = make_costeff_plot(sample_scenarios, this_mitigation_cost = 15660),
          filename = file.path(RES, "breakeven_plot_15660.pdf"), base_height = 4.75)

save_plot(plot = make_costeff_plot(sample_scenarios, this_mitigation_cost = 28553),
          filename = file.path(RES, "breakeven_plot_28553.pdf"), base_height = 4.75)

save_plot(plot = make_costeff_plot(sample_scenarios, this_mitigation_cost = 62760),
          filename = file.path(RES, "breakeven_plot_62760.pdf"), base_height = 4.75)


## To report in text: points on the break even threshold
data.frame(neighbors_in_30m_centroid = seq(0, 4, 1)) %>%
  mutate(breakeven_pF = breakeven(m = 15660, loss = ConstantCaliforniaLoss, NumNb = neighbors_in_30m_centroid)) %>%
  write_csv(file.path(RES, "text_numbers_threshold.csv"))



# WELFARE EFFECTS OF POLICIES ------


# Theta of 0.51 yields 40% takeup among those who have positive private benefit
sample_scenarios %>% filter(private_benefit * 0.51 >= mitigation_cost) %>% nrow() / 
  sample_scenarios %>% filter(private_benefit >= mitigation_cost) %>% nrow()

benchmarktheta <- 0.5 # assumed inattention parameter based on above

# Calculate perceived (private) net benefits assuming benchmark theta
sample_scenarios <- sample_scenarios %>% 
  mutate(perceived_benefit = private_benefit * benchmarktheta,
         perceived_netbenefit = perceived_benefit - mitigation_cost,
         perceived_netbenefit_retrofit = perceived_benefit - mitigation_cost_retrofit)

# FIGURE: Heterogeneity in social net benefits by wildfire risk and new build / retrofit -----

binbreaks <- c(seq(0, 0.02, 0.0025), Inf)
binlabels <- as.character(100 * binbreaks[1:(length(binbreaks) - 1)])
binlabels[length(binlabels)] <- paste0(binlabels[length(binlabels)], "+")

# Create cut variable by wildfire risk
sample_scenarios <- sample_scenarios %>% 
  mutate(wildfire_hazard_cut = cut(wildfire_hazard, breaks = binbreaks, include.lowest = T, labels = binlabels))

p_mid <- ggplot(data = sample_scenarios, aes(x = wildfire_hazard_cut, y = netbenefit)) +
  geom_hline(yintercept = 0) +
  geom_boxplot(varwidth = FALSE, outliers = F) +
  scale_x_discrete(name = "Annual Wildfire Probability (%)") +
  scale_y_continuous(name = "Net Social Benefit", labels = comma, n.breaks = 10) +
  coord_cartesian(ylim = c(-75000, 125000)) + 
  ggtitle("As New Construction") +
  theme_minimal_hgrid() + 
  theme(plot.margin = margin(t = 7, r = 0, b = 7, l = 7))

# Note: previous version fo this had different bounds, but possibly in a way that misrepresented the data a bit.

p_retrofit <- p_mid %+% aes(y = netbenefit_retrofit) %+% 
  ggtitle("As Retrofit") %+% 
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.y = element_blank(),
        plot.margin = margin(t = 7, r = 7, b = 7, l = 0))

save_plot(plot = plot_grid(p_mid, p_retrofit, ncol = 2, align = "v"), 
          filename = file.path(RES, "boxes_netbenefit.pdf"), base_height = 7)

# Calculate welfare effects of policies ----

# Calculate welfare based on adoption status, private benefits, external benefits, costs, and subsidy
calculate_welfare <- function(adopt, private_benefits, external_benefits, costs, subsidy = 0) {
  # adopt: vector of T/F adoption
  # private_benefits: vector of private benefits (if adopt = T)
  # external_benefits: vector of external benefits (if adopt = T)
  # costs: vector of costs (if adopt = T)
  # subsidy: scalar or vector of subsidies
  
  # adopt <- df$mitigate_county; private_benefits <- df$private_benefit; external_benefits <- df$external_benefit; costs <- df$cost; subsidy <- 0 # Checks out
  # END DEBUG
  
  df <- data.frame(
    adopt = adopt,
    private_benefits = private_benefits,
    external_benefits = external_benefits,
    costs = costs,
    subsidy = subsidy
  )
  
  out <- df %>% 
    filter(adopt == TRUE) %>%
    summarize(
      netsocial = sum(private_benefits + external_benefits - costs) / 1e6,
      netprivate = sum(private_benefits - costs) / 1e6,
      external = sum(external_benefits) / 1e6,
      adopters = n(),
      adopters_prop = n() / nrow(df),
      bad_adopters = sum(private_benefits + external_benefits - costs < 0),
      badadopters_prop = bad_adopters / nrow(df),
      fiscalcost = sum(subsidy) / 1e6
    )
  
  # Return as list
  out %>% as.list()
}

model_all_policies <- function(df, theta = 0.5, include_inframarginal_for_group = TRUE) {
  # df <- sample_scenarios; theta = 0.5; include_inframarginal_for_group = TRUE
  # END DEBUG
  
  df <- df %>% rename(cost = mitigation_cost)
  
  # Calculate welfare for all policies
  welfare_list <- list()
  
  # (Re-)calculate perceived and net benefits
  df <- df %>% mutate(
    perceived_netbenefit = private_benefit * theta - cost,
    netbenefit = private_benefit + external_benefit - cost
  )
  
  
  # Benchmarks
  ## Mitigation w/out policy
  df <- df %>% mutate(mitigate_nopolicy = perceived_netbenefit >= 0)
  
  welfare_list[["No policy"]] <- with(df, calculate_welfare(mitigate_nopolicy, private_benefit, external_benefit, cost))
  
  
  ## Perfectly targeted standard
  df <- df %>% mutate(mitigate_perfectstandard = netbenefit >= 0)
  
  
  welfare_list[["Perfect standard"]] <- with(df, calculate_welfare(mitigate_perfectstandard, private_benefit, external_benefit, cost))
  
  ## Perfectly differentiated subsidy
  df <- df %>% 
    mutate(perf_subsidy = case_when( # Only offer subsidies for people who would are truly on margin and who the regulator would want to mitigate
      perceived_netbenefit < 0 & netbenefit > 0 ~ -1 * perceived_netbenefit,
      TRUE ~ 0),
      mitigate_perf_subsidy = perceived_netbenefit + perf_subsidy >= 0)
  
  welfare_list[["Perfect subsidy"]] <- with(df, calculate_welfare(mitigate_perf_subsidy, private_benefit, external_benefit, cost, subsidy = perf_subsidy))
  
  # Building codes 
  
  ## Targeting based only on wildfire hazard
  
  # Identify optimal hazard requirement
  optimal_pF <- optimize(welfare_given_hazard, interval = c(0, 0.1), maximum = T, df = df)$maximum
  
  # Calculate mitigation given optimal pF require(package)
  df <- df %>% mutate(mitigate_hazard_targeted = wildfire_hazard >= optimal_pF)
  
  welfare_list[["Hazard only"]] <- with(df, calculate_welfare(mitigate_hazard_targeted, private_benefit, external_benefit, cost))
  
  ## County-level targeting
  # Group counties, compute whether forcing mitigation would be welfare improving, then decide
  
  df <- df %>% mutate(mitigate_county = get_mitigation_group(df, FIPS, include_inframarginal_for_group))
  welfare_list[["County"]] <- with(df, calculate_welfare(mitigate_county, private_benefit, external_benefit, cost))
  
  ## Census block
  df <- df %>% mutate(mitigate_block = get_mitigation_group(df, block, include_inframarginal_for_group))
  welfare_list[["Census block"]] <- with(df, calculate_welfare(mitigate_block, private_benefit, external_benefit, cost))
  
  ## Street
  df <- df %>% mutate(mitigate_street = get_mitigation_group(df, streetID, include_inframarginal_for_group))
  welfare_list[["Street"]] <- with(df, calculate_welfare(mitigate_street, private_benefit, external_benefit, cost))
  
  # Subsidies
  
  ## Uniform subsidy
  # This is the Allcott Taubinsky approach -- find the 1% of people with net benefits closest to zero, then offer them the average of their perceived benefits
  # unif_subsidy <- df %>% 
  #   arrange(abs(netbenefit)) %>% 
  #   slice(1:round(0.01 * nrow(.))) %>% # 
  #   summarize(unif_subsidy = -mean(perceived_netbenefit)) %>%
  #   pull(unif_subsidy)
  
  
  # Optimize approach (similar to but faster than a grid search) gets a different answer than Allcott Taubinsky so we prefer it
  unif_subsidy <- optimize(welfare_given_subsidy, interval = c(0, 100000), df = df, maximum = T)$maximum
  
  df <- df %>% mutate(unif_subsidy = unif_subsidy,
                      mitigate_unif_subsidy = perceived_netbenefit + unif_subsidy >= 0)
  
  welfare_list[["Uniform"]] <- with(df, calculate_welfare(mitigate_unif_subsidy, private_benefit, external_benefit, cost, subsidy = unif_subsidy))
  
  ## Optimal per square foot subsidy - everyone gets offered the same amount per square foot
  psf_subsidy_psf <- df %>%
    arrange(abs(netbenefit / sqfeet)) %>%
    slice(1:round(0.01 * nrow(.))) %>%
    summarize(psf_subsidy_psf = -mean(perceived_netbenefit / sqfeet)) %>%
    pull(psf_subsidy_psf)
  
  df <- df %>%
    mutate(psf_subsidy = psf_subsidy_psf * sqfeet,
           mitigate_psf_subsidy = perceived_netbenefit + psf_subsidy >= 0)
  
  # Optimize could work here, but since Allcott Taubinsky is quite close for persf subsidy we don't use it
  # psf_subsidy <- optimize(welfare_given_subsidy, interval = c(0, 10), maximum = T, df = df, persf = T)$maximum
  
  welfare_list[["Per square foot"]] <- with(df, calculate_welfare(mitigate_psf_subsidy, private_benefit, external_benefit, cost, subsidy = psf_subsidy))
  
  
  # Compile all
  welfare_df <- bind_rows(welfare_list, .id = "policy")
  welfare_df
}

# Given a wildfire hazard requirement, return social welfare (for choosing optimal)
welfare_given_hazard <- function(this_hazard, df) {
  df <- df %>% mutate(mitigate_this_hazard = wildfire_hazard >= this_hazard)
  
  out <- with(df, calculate_welfare(mitigate_this_hazard, private_benefit, external_benefit, cost))
  out$netsocial
}

# Given a subsidy, return social welfare (for choosing optimal)
welfare_given_subsidy <- function(this_subsidy, df, persf = F) {
  if (persf == FALSE) {
    df <- df %>% mutate(mitigate_this_subsidy = perceived_netbenefit + this_subsidy >= 0)
  } else {
    df <- df %>% mutate(mitigate_this_subsidy = perceived_netbenefit + this_subsidy * sqfeet >= 0)
  }
  
  out <- with(df, calculate_welfare(mitigate_this_subsidy, private_benefit, external_benefit, cost, subsidy = this_subsidy * sqfeet))
  out$netsocial
}



get_mitigation_group <- function(df, grouping_var, include_inframarginal_for_group = T) {
  df <- df %>% group_by( {{ grouping_var }})
  
  if (!include_inframarginal_for_group) { # Allow planner to exclude marginal adopters
    df <- df %>% mutate(mitigate_group = sum(netbenefit[mitigate_nopolicy == FALSE]) >= 0)
  } else {
    df <- df %>% mutate(mitigate_group = sum(netbenefit) >= 0)
  }
  
  df <- df %>%
    ungroup() %>%
    mutate(mitigate_group = mitigate_group | mitigate_nopolicy) # Mitigation happens if county requires it or if people do it voluntarily
  
  df$mitigate_group
}

# EXHIBITS ----


## Main table (policies.tex) ----
welfare_df <- model_all_policies(sample_scenarios)

welfare_df_display <- welfare_df %>% 
  transmute(policy, netsocial, netprivate, external, adopters_prop = adopters_prop * 100, badadopters_prop = badadopters_prop * 100, fiscalcost)

kbl(welfare_df_display,
    col.names = linebreak(c(
      "Policy",
      "Social Net\nBenefit\n(\\$M)",
      "Private Net\nBenefit\n(\\$M)",
      "External\nBenefit\n(\\$M)",
      "Total\nAdoption\nRate",
      "Inefficient\nAdoption\nRate",
      "Fiscal\nCost\n (\\$M)"
    ), align = "c"),
    format = "latex",
    escape = F,
    format.args = list(big.mark = ","),
    linesep = "",
    digits = c(NA, 0, 0, 0, 1, 1, 0),
    booktabs = T
) %>%
  group_rows("Panel A. Benchmarks", 1, 3, bold = F, italic = T) %>%
  group_rows("Panel B. Building Codes", 4, 7, bold = F, italic = T) %>%
  pack_rows("Panel C. Subsidies", 8, 9, bold = F, italic = T) %>%
  write(file.path(RES, "policies.tex"))



## Appx - Welfare under different policies with alternative thetas ----

# Try alternative thetas
possible_thetas <- seq(0.25, 1, 0.05)
welfare_df_thetas <- lapply(possible_thetas, function(theta) {
  print(theta)
  model_all_policies(sample_scenarios, theta = theta) %>% mutate(theta = theta)
}) %>% rbindlist()

# Make a figure that shows how net benefits change with theta among No policy, perfectly targeted subsidy, street, and uniform subsidy per square foot
p_theta <- ggplot(data = welfare_df_thetas %>% 
                    filter(policy %in% c("No policy", "Perfect standard", "Street", "Per square foot")),
                  aes(x = theta, y = netsocial, color = policy, shape = policy)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(name = expression(paste("Degree of Misperception (", theta, ")")), n.breaks = 7) +
  scale_y_continuous(name = "Net Social Benefit ($M)", labels = comma) +
  scale_color_viridis_d(name = NULL) + 
  scale_shape_discrete(name = NULL) + 
  theme_minimal_hgrid()

save_plot(file.path(RES, "policy_sensitivity.pdf"), p_theta, base_height = 4.5)

## Appx: Policies where policymaker excludes external benefits -----
# nospillover_policies.tex
welfare_df_noext <- model_all_policies(sample_scenarios %>% mutate(external_benefit = 0))

welfare_df_noext_display <- welfare_df_noext %>% 
  transmute(policy, netsocial, netprivate, external, adopters_prop = adopters_prop * 100, badadopters_prop = badadopters_prop * 100, fiscalcost)

kbl(welfare_df_noext_display,
    col.names = linebreak(c(
      "Policy",
      "Social Net\nBenefit\n(\\$M)",
      "Private Net\nBenefit\n(\\$M)",
      "External\nBenefit\n(\\$M)",
      "Total\nAdoption\nRate",
      "Inefficient\nAdoption\nRate",
      "Fiscal\nCost\n (\\$M)"
    ), align = "c"),
    format = "latex",
    escape = F,
    format.args = list(big.mark = ","),
    linesep = "",
    digits = c(NA, 0, 0, 0, 1, 1, 0),
    booktabs = T
) %>%
  pack_rows("Panel A. Benchmarks", 1, 3, bold = F, italic = T) %>%
  pack_rows("Panel B. Building Codes", 4, 7, bold = F, italic = T) %>%
  pack_rows("Panel C. Subsidies", 8, 9, bold = F, italic = T) %>%
  write(file.path(RES, "nospillover_policies.tex"))


## TABLE: compare version of building code policies that do not and do account for inframarginal adopters 
welfare_df_noinfra <- model_all_policies(sample_scenarios, include_inframarginal_for_group = F)

welfare_df_noinfra_display <- bind_rows(
  welfare_df %>% filter(policy %in% c("County", "Census block", "Street")),
  welfare_df_noinfra_displayB <- welfare_df_noinfra %>% filter(policy %in% c("County", "Census block", "Street"))) %>%
  transmute(policy, netsocial, netprivate, external, adopters_prop = adopters_prop * 100, badadopters_prop = badadopters_prop * 100, fiscalcost)



kbl(welfare_df_noinfra_display,
    col.names = linebreak(c(
      "Policy",
      "Social Net\nBenefit\n(\\$M)",
      "Private Net\nBenefit\n(\\$M)",
      "External\nBenefit\n(\\$M)",
      "Total\nAdoption\nRate",
      "Inefficient\nAdoption\nRate",
      "Fiscal\nCost\n (\\$M)"
    ), align = "c"),
    format = "latex",
    escape = F,
    format.args = list(big.mark = ","),
    linesep = "",
    digits = c(NA, 0, 0, 0, 1, 1, 0),
    booktabs = T
) %>%
  pack_rows("Panel A. Targeting Average Benefit", 1, 3, bold = F, italic = T) %>%
  pack_rows("Panel B. Targeting Marginal Benefit", 4, 6, bold = F, italic = T) %>%
  write(file.path(RES, "marginal_standards.tex"))




## Appx - Decompose variance in wildfire hazard by spatial units -----

# For each variable and street grouping, compute the within and between variance
compute_within_var <- function(this_data, varname, groupvar, min_in_group = 2) {
  # this_data = data
  # varname = "combinedyear"
  # groupvar = "streetXfire"
  # min_in_group = 2
  
  this_data <- this_data %>% add_count(get(groupvar), name = "homes_in_group")
  
  # Limit based on min in group
  this_data <- this_data %>% filter(homes_in_group >= min_in_group)
  
  # Fit model with just an intercept (for total variance), then model with group var (for within variance)
  fit1 <- feols(as.formula(sprintf("%s ~ 1", varname)), data = this_data)
  fit2 <- feols(as.formula(sprintf("%s ~ 1 | %s", varname, groupvar)), data = this_data)
  
  return(list(
    varname = varname,
    groupvar = groupvar,
    nhomes = nrow(this_data),
    ngroups = n_distinct(this_data[[groupvar]]),
    overallvar = var(residuals(fit1)),
    withinvar = var(residuals(fit2)),
    betweenvar = var(residuals(fit1)) - var(residuals(fit2)),
    withinpercent = 100 * var(residuals(fit2)) / var(residuals(fit1))
  ))
}

# Create a table 
within_var_tbl <- bind_rows(
  compute_within_var(sample_scenarios, "wildfire_hazard", "FIPS"),
  compute_within_var(sample_scenarios, "wildfire_hazard", "block"),
  compute_within_var(sample_scenarios, "wildfire_hazard", "streetID"),
)

within_var_tbl_display <- within_var_tbl %>%
  transmute(level = case_when(
    groupvar == "FIPS" ~ "County",
    groupvar == "block" ~ "Census block",
    groupvar == "streetID" ~ "Street"
  ), 
  ngroups = ngroups,
  non_singleton_homes = nhomes,
  overallvar = overallvar * 10000,
  withinvar = withinvar * 10000,
  betweenvar = betweenvar * 10000,
  withinpercent = withinpercent)

kbl(within_var_tbl_display,
    col.names = linebreak(c(
      "Grouping",
      "Units",
      "Non-\nsingletons",
      "Total",
      "Within",
      "Between",
      "Within\n(\\%)"
    ), align = "c"),
    format = "latex",
    escape = F,
    format.args = list(big.mark = ","),
    linesep = "",
    digits = c(NA, 0, 0, 3, 3, 3, 1),
    align = c("l", "c", "c", "c", "c", "c", "c"),
    booktabs = T
) %>%
  write(file.path(RES, "variancedecomp.tex"))


## Use a triangular distribution for theta (R5 comment) -----
# Creates correlated_theta.tex


model_random_theta <- function(df, Z) {
  # Mode of triangular distribution (c) depends on fire risk. Z scales the size of this dependence.
  
  # Z <- -25
  # END DEBUG
  # print(Z)
  
  df <- df %>% rename(cost = mitigation_cost)
  
  # a <- 0.4
  # b <- 0.95 # R5's requested params
  
  # Params that match R5's range but result in a mean of 0.5 or so, to match our other simulations
  a <- 0.2
  b <- 0.75
  
  c0 <- a + (b - a) / 2
  
  if (abs(Z * 0.005) > min(
    c0 - a,
    b - c0
  )) {
    return("Z out of range")
  }
  
  # Draw theta for everyone in sample using triangular distribution
  # c is an individual-specific mode that depends on wildfire risk
  # Alt: DescTools::Triangular? Not sure we can do individual-specific models as easily, though.
  df <- df %>% mutate(
    riskdeviation = if_else(wildfire_hazard > 0.01, 0.01, wildfire_hazard),
    riskdeviation = riskdeviation - 0.005,
    u = runif(n = n()),
    c = c0 + Z * riskdeviation,
    d = (c - a) / (b - a),
    theta = case_when(
      u < d ~ a + sqrt(u * (b - a) * (c - a)),
      u >= d ~ b - sqrt((1 - u) * (b - a) * (b - c))
    ))
  
  # print(summary(df$theta))
  
  # (Re-)calculate perceived and net benefits
  df <- df %>% mutate(
    perceived_netbenefit = private_benefit * theta - cost,
    netbenefit = private_benefit + external_benefit - cost
  )
  
  welfare_list <- list()
  
  # Benchmarks
  ## Mitigation w/out policy
  df <- df %>% mutate(mitigate_nopolicy = perceived_netbenefit >= 0)
  
  welfare_list[["No policy"]] <- with(df, calculate_welfare(mitigate_nopolicy, private_benefit, external_benefit, cost))
  
  ## Perfectly targeted standard
  df <- df %>% mutate(mitigate_perfectstandard = netbenefit >= 0)
  
  welfare_list[["Perfect standard"]] <- with(df, calculate_welfare(mitigate_perfectstandard, private_benefit, external_benefit, cost))
  
  # Identify optimal hazard requirement
  optimal_pF <- optimize(welfare_given_hazard, interval = c(0, 0.1), maximum = T, df = df)$maximum
  
  # Calculate mitigation given optimal pF required
  df <- df %>% mutate(mitigate_hazard_targeted = wildfire_hazard >= optimal_pF)
  
  welfare_list[["Hazard only"]] <- with(df, calculate_welfare(mitigate_hazard_targeted, private_benefit, external_benefit, cost))
  
  ## Census block standard
  df <- df %>% mutate(mitigate_block = get_mitigation_group(df, block))
  welfare_list[["Census block"]] <- with(df, calculate_welfare(mitigate_block, private_benefit, external_benefit, cost))
  
  ## Optimal per square foot subsidy - everyone gets offered the same amount per square foot
  psf_subsidy_psf <- df %>%
    arrange(abs(netbenefit / sqfeet)) %>%
    slice(1:round(0.01 * nrow(.))) %>%
    summarize(psf_subsidy_psf = -mean(perceived_netbenefit / sqfeet)) %>%
    pull(psf_subsidy_psf)
  
  df <- df %>%
    mutate(psf_subsidy = psf_subsidy_psf * sqfeet,
           mitigate_psf_subsidy = perceived_netbenefit + psf_subsidy >= 0)
  
  # Optimize could work here, but since Allcott Taubinsky is quite close for persf subsidy we don't use it
  # psf_subsidy <- optimize(welfare_given_subsidy, interval = c(0, 10), maximum = T, df = df, persf = T)$maximum
  
  welfare_list[["Per square foot"]] <- with(df, calculate_welfare(mitigate_psf_subsidy, private_benefit, external_benefit, cost, subsidy = psf_subsidy))
  
  # Compile all
  welfare_df <- bind_rows(welfare_list, .id = "policy")
  
  welfare_df$Z = Z
  
  return(welfare_df)
}

# For each value of Z, run 200 replications and compute welfare (takes a while)
triangle_out_list <- pbmclapply(seq(-50, 50, 25), 
                                function(z) replicate(200, model_random_theta(df = sample_scenarios, Z = z), simplify = F),
                                mc.cores = 4)

triangle_out_df <- bind_rows(triangle_out_list)

# Within each Z and policy, compute averages and SDs of net benefits and adopter proportions
triangle_out_display <- triangle_out_df %>%
  group_by(Z, policy) %>%
  summarize(
    netsocial = sprintf("%s (%3.2f)", comma(mean(netsocial)), sd(netsocial)),
    adopters_prop = sprintf("%3.1f (%3.2f)", mean(adopters_prop * 100), sd(adopters_prop * 100))) %>%
  pivot_longer(c(netsocial, adopters_prop), names_to = "statistic", values_to = "value") %>%
  pivot_wider(names_from = c("policy"), values_from = "value") %>%
  arrange(rev(statistic), Z) %>%
  # mutate(Zdisplay = sprintf("%s [%0.3f, %0.3f]", Z, 0.675 - Z * 0.005, 0.675 + Z * 0.005)) %>% # For a = 0.4, b = 0.95
  # mutate(Zdisplay = sprintf("%s [%0.3f, %0.3f]", Z, 0.475 - Z * 0.005, 0.475 + Z * 0.005)) %>% # For a = 0.2, b = 0.75
  mutate(Zdisplay = Z) %>%
  ungroup() %>%
  select(Zdisplay, `No policy`, `Perfect standard`, `Hazard only`, `Census block`, `Per square foot`)

# Mode is a + pw
# a = 0.2
# w = 0.75 - 0.2 = 0.55
# p = ??

kbl(triangle_out_display,
    format = "latex",
    escape = F,
    col.names = linebreak(c("$\\gamma$", "No\npolicy", "Perfect\nstandard", "Building code\n(Hazard only)", "Building code\n(Census block)", "Subsidy \n(Per square foot)"), align = "c"),
    booktabs = T
) %>%
  pack_rows("Panel A. Social Net Benefit ($M)", 1, 5, bold = F, italic = T) %>%
  pack_rows("Panel B. Adoption Rate (%)", 6, 10, bold = F, italic = T) %>%
  write(file.path(RES, "correlated_theta.tex"))
